import os
import glob
import fitz
import openpyxl


def get_toc(file):
    doc = fitz.open(file)
    toc = doc.get_toc()
    return toc


def toc_to_excel(toc, xlsx_file):
    wb = openpyxl.Workbook()
    sheet = wb.active
    sheet.title = "bookmark"
    for index, ol in enumerate(toc):
        row = index + 1
        indent, title, page = ol
        sheet.cell(row=row, column=indent).value = title
        sheet.cell(row=row, column=indent + 1).value = page
    wb.save(xlsx_file)


def extract_bookmarks(pdf_folder, excel_folder):
    for file in glob.glob(rf'{pdf_folder}\**\*.pdf', recursive=True):
        excel_file = file.replace(pdf_folder, excel_folder).replace('.pdf', '.xlsx')
        os.makedirs(os.path.dirname(excel_file), exist_ok=True)
        toc_to_excel(get_toc(file), excel_file)
        print(file, '-->', excel_file)


if __name__ == '__main__':
    pdf_folder = r'D:\dl\数据样本\输入文件'
    excel_folder = r'D:\dl\bkmk'
    extract_bookmarks(pdf_folder, excel_folder)
